* This script contains the code to replicate Figure 3 and 4 in Ductor, L., Fafchamps, M., Goyal S. and M. van der Leij. Social Networks and Research Output. The Review of Economics and Statistics.

log using Figures3-4.log, replace
use prodndnp_fullsample.dta, clear

set matsize 800
/*Creating future netprod*/
set more off


/*MODEL 0*/
/*OLS out-of-sample program*/
forval i = 6(1)20{
reg lprodf3 y6-y27 nopapers lcprodl5 if group==1 & t==`i'
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & t==`i' & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM0= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM0)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsM0=(rssols/nols)^0.5   /*RMSE out of sample*/
di rmseols   "  Pooled OLS in-sample RMSE"
di n1 " Number of observations in the in-sample group"
di rmseo2olsM0 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
drop peols ybols
outreg2 using careertimeM0, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, R-squared,r2i1ols, RMSE,rmseo2olsM0, t,`i') excel


/*MODEL 1*/
/*OLS out-of-sample program*/

qui reg lprodf3 y6-y27 nopapers lcprod5 lcprodl5 if group==1 & t==`i'
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & t==`i' & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM1= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM1)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsM1=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM1= sqerrolsM1-sqerrolsM0
newey diffsqerrlM1 if ybols<., lag(0)
scalar diffrmseoutM1=((rmseo2olsM0-rmseo2olsM1)/rmseo2olsM0)*100
di rmseols   "  Pooled OLS in-sample RMSE"
di n1 " Number of observations in the in-sample group"
di rmseo2olsM1 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
di diffrmseoutM1 "RMSE % difference between Model 1 and Model 0"
drop peols ybols 
outreg2 using careertimeM1, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, R-squared,r2i1ols, RMSE,rmseo2olsM1, RMSE % diff, diffrmseoutM1, t,`i') excel


/*MV Model 2*/
/*OLS out-of-sample program*/

qui reg lprodf3 y6-y27 lcprodl5 nopapers lnetprod5y lnetprod25y degree5y degree25y gc5y lbet5y lclos5y neiq1fs5y if group==1 & t==`i'
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & t==`i' & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM2= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM2)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsM2=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM2= sqerrolsM2-sqerrolsM1
newey diffsqerrlM2 if ybols<., lag(0)
scalar diffrmseoutM2=((rmseo2olsM1-rmseo2olsM2)/rmseo2olsM1)*100
di rmseols   "  Pooled OLS in-sample RMSE"
di n1 " Number of observations in the in-sample group"
di rmseo2olsM2 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
di diffrmseoutM2 "RMSE % difference between Model 2 and Model 0"
drop peols ybols
outreg2 using careertimeM2, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, R-squared,r2i1ols, RMSE,rmseo2olsM2, RMSE % diff, diffrmseoutM2, t,`i') excel



/*MV Model 3*/
/*OLS out-of-sample program*/
qui reg lprodf3 y6-y27 nopapers lcprod5 lcprodl5 lnetprod5y lnetprod25y degree5y degree25y gc5y lbet5y lclos5y neiq1fs5y if group==1 & t==`i'
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & t==`i' & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM3= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM3)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsM3=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM3= sqerrolsM3-sqerrolsM1
newey diffsqerrlM3 if ybols<., lag(0)
scalar diffrmseoutM3=((rmseo2olsM1-rmseo2olsM3)/rmseo2olsM1)*100
di rmseols   "  Pooled OLS in-sample RMSE"
di n1 " Number of observations in the in-sample group"
di rmseo2olsM3 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
di diffrmseoutM3 "RMSE % difference between Model 3 and Model 1"
drop peols ybols
outreg2 using careertimeM3, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, R-squared,r2i1ols, RMSE,rmseo2olsM3, RMSE % diff, diffrmseoutM3, t,`i') excel
drop sqerrolsM0 sqerrolsM1 sqerrolsM2 sqerrolsM3 diffsqerrlM1 diffsqerrlM2 diffsqerrlM3
}
log close

/* Data editing*/ 
insheet using "careertimeM0.txt", clear
gen n=_n
drop if n<29
drop if n>35
drop if n==30
drop v1
rename v2 variable
reshape long v, i(variable)
encode variable, gen(varlabel)
destring v, generate(v2) ignore(,)
drop v
rename v2 v
drop n variable 
reshape wide v, i(_j) j(varlabel)
label variable v1 `"Observations"'
label variable v2 `"R-squared M0"'
label variable v3 `"RMSE M0"'
label variable v4 `"n_in"'
label variable v5 `"n_os"'
label variable v6 `"t"'
drop _j
rename v1 obs 
rename v2 rm0 
rename v3 rmsem0 
rename v4 nin
rename v5 nos
rename v6 t
save careertimeM0,replace


insheet using "careertimeM1.txt", clear
gen n=_n
drop if n<5
drop if n>11
rename v1 variable
reshape long v, i(variable)
encode variable, gen(varlabel)
destring v, generate(v2) ignore(,)
drop v
rename v2 v
drop n variable 
reshape wide v, i(_j) j(varlabel)
label variable v1 `"Observations"'
label variable v2 `"R-squared M1"'
label variable v3 `"RMSE M1"'
label variable v4 `"RMSE % Diff. M1 M0"'
label variable v5 `"n_in"'
label variable v6 `"n_os"'
label variable v7 `"t"'
drop _j
rename v1 obs 
rename v2 rm1 
rename v3 rmsem1 
rename v4 rmsediffm1m0
rename v5 nin
rename v6 nos
rename v7 t
save careertimeM1,replace


insheet using "careertimeM2.txt", clear
gen n=_n
drop if n<5
drop if n>11
rename v1 variable
reshape long v, i(variable)
encode variable, gen(varlabel)
destring v, generate(v2) ignore(,)
drop v
rename v2 v
drop n variable
reshape wide v, i(_j) j(varlabel)
label variable v1 `"Observations"'
label variable v2 `"R-squared MV2"'
label variable v3 `"RMSE MV2"'
label variable v4 `"RMSE % Diff. MV2 M1"'
label variable v5 `"n_in"'
label variable v6 `"n_os"'
label variable v7 `"t"'
rename v1 obs 
rename v2 rm2 
rename v3 rmsem2 
rename v4 rmsediffm2m1
rename v5 nin
rename v6 nos
rename v7 t
drop _j
save careertimeM2,replace

insheet using "careertimeM3.txt", clear
gen n=_n
drop if n<5
drop if n>11
rename v1 variable
reshape long v, i(variable)
encode variable, gen(varlabel)
destring v, generate(v2) ignore(,)
drop v
rename v2 v
drop n variable
reshape wide v, i(_j) j(varlabel)
label variable v1 `"Observations"'
label variable v2 `"R-squared MV3"'
label variable v3 `"RMSE MV3"'
label variable v4 `"RMSE % Diff. MV3 M1"'
label variable v5 `"n_in"'
label variable v6 `"n_os"'
label variable v7 `"t"'
drop _j
rename v1 obs 
rename v2 rm3 
rename v3 rmsem3 
rename v4 rmsediffm3m1
rename v5 nin
rename v6 nos
rename v7 t

save careertimeM3,replace

joinby t using careertimeM0
joinby t using careertimeM1
joinby t using careertimeM2
drop obs nin nos rm1 rm2 rm3 rm0
order t rmsem0 rmsem1 rmsem2 rmsem3 rmsediffm1m0 rmsediffm2m1 rmsediffm3m1
save Careertime, replace

/*********************Creating the graphs, figure 3 and 4***********************************/
twoway (line rmsem0 t) (line rmsem1 t) (line rmsem2 t)  (line rmsem3 t), xlabel(5(5)20) graphregion(fcolor(white) lcolor(white))
graph export "Figure3.eps", as(eps) preview(off) replace

twoway (line rmsediffm2m1 t) (line rmsediffm3m1 t), xlabel(5(5)20) graphregion(fcolor(white) lcolor(white))
graph export "Figure4.eps", as(eps) preview(off) replace




